rm(list=ls())
setwd("/Files for R/ReSTAT")
library("Matrix")
library('foreign')


Peerz=read.csv("IDtestDay_ReSTAT.csv")
chnan=Peerz$chscr2
rsknan=Peerz$rskscr2
Peerz$labelID=as.character(Peerz$labelID)
Peerz$labelID=factor(Peerz$labelID, labels=1:163)
Peerz$labelfac61=as.numeric(Peerz$labelfac61)

#convert dates into date format
Peerz$rec_in=as.Date(Peerz$rec_in, format='%d %b %y')
Peerz$rec_out=as.Date(Peerz$rec_out, format='%d %b %y')

##################################
### Build the peer matrix #########
###################################
# first generate an NxN matrix of null values 
short=matrix(0,length(Peerz$rec_out),length(Peerz$rec_out))

#Add a column to organize the data
Peerz=transform(Peerz,share=0, lastin=0, firstout=0)

#Build peer measures
system.time(
  for (k in 1:length(Peerz$numzz2)){
    print(k)
    #make the subset
    samefacility=subset(Peerz, with(Peerz,labelfac61==labelfac61[k] & rec_in<=rec_out[k] & rec_out>=rec_in[k]))
    #build the time shared variable (is there a quicker way?)
    samefacility$firstout=ifelse(samefacility$rec_out<Peerz$rec_out[k], samefacility$rec_out, Peerz$rec_out[k])
    samefacility$lastin  =ifelse(samefacility$rec_in >Peerz$rec_in[k],  samefacility$rec_in,  Peerz$rec_in[k])
    samefacility$share=samefacility$firstout-samefacility$lastin
    #adding values to the peer matrix
    for(j in 1:length(samefacility$numzz2)){
      short[k,samefacility$numzz2[j]]=samefacility$share[j]
      short[k,k]=0
    }
    
    rm(samefacility)
  }
)
short=Matrix(short, sparse=T)
save(short, file="shortID_DayReSTAT.RData") 


ones=rep(1,length(Peerz$numzz2))


########################################
####### begin here with the split sample ID test
########################################

repet<-10000
length<-length(Peerz$numzz2)
est<-matrix(NaN,4, repet)

system.time(
  for (k in 1:repet){
    print(k)
    group=rbinom(length,1,.5)
    angroup=ifelse(group==1,NaN,1)
    split=short*group
    split=t(split)
    totday=matrix(split%*%ones)
    Peerz$prskscore=as.vector(((split%*%Peerz$rskscr2)*angroup)/totday)
    Peerz$pchscore =as.vector(((split%*%Peerz$chscr2) *angroup)/totday)
    Peerz$risk=rsknan*angroup
    Peerz$ch=  chnan*angroup
    
    ## do the cluster bootstrap
    newfac<-sample(1:length(unique(Peerz$labelfac61)),length(unique(Peerz$labelfac61)),replace=T)
    df<-Peerz[Peerz$labelfac61==newfac[1],]
    for (j in 2:length(unique(Peerz$labelfac61))){
      dftemp<-Peerz[Peerz$labelfac61==newfac[j],]
      df<-rbind(df, dftemp)
    }
    zrsk=lm(prskscore~factor(df$labelfac613)+factor(df$quarter)+risk,df)
    zch= lm(pchscore ~factor(df$labelfac613)+factor(df$quarter)+ch, df)
    
    est[1,k]=coef(summary(zrsk))["risk","Estimate"]
    est[2,k]=coef(summary(zrsk))["risk","t value"]
    est[3,k]=coef(summary(zch))["ch","Estimate"]  
    est[4,k]=coef(summary(zch))["ch","t value"] 
  }
)

########################################
####  Analyze results
########################################
meanriskco=mean(est[1,], na.rm=T)
meanriskco
sdriskco=sd(est[1,],na.rm=T)
sdriskco

meanchco=mean(est[3,], na.rm=T)
meanchco
sdchco=sd(est[3,],na.rm=T)
sdchco